home *** CD-ROM | disk | FTP | other *** search
- //: C17:HTMLStripper.cpp
- // From Thinking in C++, 2nd Edition
- // Available at http://www.BruceEckel.com
- // (c) Bruce Eckel 1999
- // Copyright notice in Copyright.txt
- // Filter to remove html tags and markers
- #include "../require.h"
- #include <fstream>
- #include <iostream>
- #include <string>
- using namespace std;
-
- string replaceAll(string s, string f, string r) {
- unsigned int found = s.find(f);
- while(found != string::npos) {
- s.replace(found, f.length(), r);
- found = s.find(f);
- }
- return s;
- }
-
- string stripHTMLTags(string s) {
- while(true) {
- unsigned int left = s.find('<');
- unsigned int right = s.find('>');
- if(left==string::npos || right==string::npos)
- break;
- s = s.erase(left, right - left + 1);
- }
- s = replaceAll(s, "<", "<");
- s = replaceAll(s, ">", ">");
- s = replaceAll(s, "&", "&");
- s = replaceAll(s, " ", " ");
- // Etc...
- return s;
- }
-
- int main(int argc, char* argv[]) {
- requireArgs(argc, 1,
- "usage: HTMLStripper InputFile");
- ifstream in(argv[1]);
- assure(in, argv[1]);
- const int sz = 4096;
- char buf[sz];
- while(in.getline(buf, sz)) {
- string s(buf);
- cout << stripHTMLTags(s) << endl;
- }
- } ///:~
-